/*
 *    This program is free software; you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation; either version 2 of the License, or
 *    (at your option) any later version.
 *
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with this program; if not, write to the Free Software
 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

/*
 * InterquartileRange.java
 * Copyright (C) 2006 University of Waikato, Hamilton, New Zealand
 */

package weka.filters.unsupervised.attribute;

import weka.core.Attribute;
import weka.core.Capabilities;
import weka.core.FastVector;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.Range;
import weka.core.RevisionUtils;
import weka.core.Utils;
import weka.core.Capabilities.Capability;
import weka.filters.SimpleBatchFilter;

import java.util.Enumeration;
import java.util.Vector;

/**
 * <!-- globalinfo-start --> A filter for detecting outliers and extreme values
 * based on interquartile ranges. The filter skips the class attribute.<br/>
 * <br/> Outliers:<br/> Q3 + OF*IQR &lt; x &lt;= Q3 + EVF*IQR<br/> or<br/> Q1 -
 * EVF*IQR &lt;= x &lt; Q1 - OF*IQR<br/> <br/> Extreme values:<br/> x &gt; Q3 +
 * EVF*IQR<br/> or<br/> x &lt; Q1 - EVF*IQR<br/> <br/> Key:<br/> Q1 = 25%
 * quartile<br/> Q3 = 75% quartile<br/> IQR = Interquartile Range, difference
 * between Q1 and Q3<br/> OF = Outlier Factor<br/> EVF = Extreme Value Factor
 * <p/> <!-- globalinfo-end -->
 * 
 * <!-- options-start --> Valid options are: <p/>
 * 
 * <pre>
 *  -D
 *  Turns on output of debugging information.
 * </pre>
 * 
 * <pre>
 *  -R &lt;col1,col2-col4,...&gt;
 *  Specifies list of columns to base outlier/extreme value detection
 *  on. If an instance is considered in at least one of those
 *  attributes an outlier/extreme value, it is tagged accordingly.
 *  'first' and 'last' are valid indexes.
 *  (default none)
 * </pre>
 * 
 * <pre>
 *  -O &lt;num&gt;
 *  The factor for outlier detection.
 *  (default: 3)
 * </pre>
 * 
 * <pre>
 *  -E &lt;num&gt;
 *  The factor for extreme values detection.
 *  (default: 2*Outlier Factor)
 * </pre>
 * 
 * <pre>
 *  -E-as-O
 *  Tags extreme values also as outliers.
 *  (default: off)
 * </pre>
 * 
 * <pre>
 *  -P
 *  Generates Outlier/ExtremeValue pair for each numeric attribute in
 *  the range, not just a single indicator pair for all the attributes.
 *  (default: off)
 * </pre>
 * 
 * <pre>
 *  -M
 *  Generates an additional attribute 'Offset' per Outlier/ExtremeValue
 *  pair that contains the multiplier that the value is off the median.
 *     value = median + 'multiplier' * IQR
 * Note: implicitely sets '-P'. (default: off)
 * </pre>
 * 
 * <!-- options-end -->
 * 
 * Thanks to Dale for a few brainstorming sessions.
 * 
 * @author Dale Fletcher (dale at cs dot waikato dot ac dot nz)
 * @author fracpete (fracpete at waikato dot ac dot nz)
 * @version $Revision: 1.3 $
 */
public class InterquartileRange extends SimpleBatchFilter {

	/** for serialization */
	private static final long serialVersionUID = -227879653639723030L;

	/** indicator for non-numeric attributes */
	public final static int NON_NUMERIC = -1;

	/** the attribute range to work on */
	protected Range m_Attributes = new Range("first-last");

	/** the generated indices (only for performance reasons) */
	protected int[] m_AttributeIndices = null;

	/** the factor for detecting outliers */
	protected double m_OutlierFactor = 3;

	/** the factor for detecting extreme values, by default 2*m_OutlierFactor */
	protected double m_ExtremeValuesFactor = 2 * m_OutlierFactor;

	/** whether extreme values are also tagged as outliers */
	protected boolean m_ExtremeValuesAsOutliers = false;

	/** the upper extreme value threshold (= Q3 + EVF*IQR) */
	protected double[] m_UpperExtremeValue = null;

	/** the upper outlier threshold (= Q3 + OF*IQR) */
	protected double[] m_UpperOutlier = null;

	/** the lower outlier threshold (= Q1 - OF*IQR) */
	protected double[] m_LowerOutlier = null;

	/** the interquartile range */
	protected double[] m_IQR = null;

	/** the median */
	protected double[] m_Median = null;

	/** the lower extreme value threshold (= Q1 - EVF*IQR) */
	protected double[] m_LowerExtremeValue = null;

	/**
	 * whether to generate Outlier/ExtremeValue attributes for each attribute
	 * instead of a general one
	 */
	protected boolean m_DetectionPerAttribute = false;

	/** the position of the outlier attribute */
	protected int[] m_OutlierAttributePosition = null;

	/**
	 * whether to add another attribute called "Offset", that lists the
	 * 'multiplier' by which the outlier/extreme value is away from the median,
	 * i.e., value = median + 'multiplier' * IQR <br/> automatically enables
	 * m_DetectionPerAttribute!
	 */
	protected boolean m_OutputOffsetMultiplier = false;

	/**
	 * Returns a string describing this filter
	 * 
	 * @return a description of the filter suitable for displaying in the
	 *         explorer/experimenter gui
	 */
	public String globalInfo() {
		return "A filter for detecting outliers and extreme values based on "
				+ "interquartile ranges. The filter skips the class attribute.\n\n"
				+ "Outliers:\n" + "  Q3 + OF*IQR < x <= Q3 + EVF*IQR\n"
				+ "  or\n" + "  Q1 - EVF*IQR <= x < Q1 - OF*IQR\n" + "\n"
				+ "Extreme values:\n" + "  x > Q3 + EVF*IQR\n" + "  or\n"
				+ "  x < Q1 - EVF*IQR\n" + "\n" + "Key:\n"
				+ "  Q1  = 25% quartile\n" + "  Q3  = 75% quartile\n"
				+ "  IQR = Interquartile Range, difference between Q1 and Q3\n"
				+ "  OF  = Outlier Factor\n" + "  EVF = Extreme Value Factor";
	}

	/**
	 * Returns an enumeration describing the available options.
	 * 
	 * @return an enumeration of all the available options.
	 */
	public Enumeration listOptions() {
		Vector result = new Vector();
		Enumeration enm = super.listOptions();
		while (enm.hasMoreElements())
			result.add(enm.nextElement());

		result
				.addElement(new Option(
						"\tSpecifies list of columns to base outlier/extreme value detection\n"
								+ "\ton. If an instance is considered in at least one of those\n"
								+ "\tattributes an outlier/extreme value, it is tagged accordingly.\n"
								+ " 'first' and 'last' are valid indexes.\n"
								+ "\t(default none)", "R", 1,
						"-R <col1,col2-col4,...>"));

		result.addElement(new Option("\tThe factor for outlier detection.\n"
				+ "\t(default: 3)", "O", 1, "-O <num>"));

		result.addElement(new Option(
				"\tThe factor for extreme values detection.\n"
						+ "\t(default: 2*Outlier Factor)", "E", 1, "-E <num>"));

		result.addElement(new Option(
				"\tTags extreme values also as outliers.\n"
						+ "\t(default: off)", "E-as-O", 0, "-E-as-O"));

		result
				.addElement(new Option(
						"\tGenerates Outlier/ExtremeValue pair for each numeric attribute in\n"
								+ "\tthe range, not just a single indicator pair for all the attributes.\n"
								+ "\t(default: off)", "P", 0, "-P"));

		result
				.addElement(new Option(
						"\tGenerates an additional attribute 'Offset' per Outlier/ExtremeValue\n"
								+ "\tpair that contains the multiplier that the value is off the median.\n"
								+ "\t   value = median + 'multiplier' * IQR\n"
								+ "Note: implicitely sets '-P'."
								+ "\t(default: off)", "M", 0, "-M"));

		return result.elements();
	}

	/**
	 * Parses a list of options for this object. <p/>
	 * 
	 * <!-- options-start --> Valid options are: <p/>
	 * 
	 * <pre>
	 *  -D
	 *  Turns on output of debugging information.
	 * </pre>
	 * 
	 * <pre>
	 *  -R &lt;col1,col2-col4,...&gt;
	 *  Specifies list of columns to base outlier/extreme value detection
	 *  on. If an instance is considered in at least one of those
	 *  attributes an outlier/extreme value, it is tagged accordingly.
	 *  'first' and 'last' are valid indexes.
	 *  (default none)
	 * </pre>
	 * 
	 * <pre>
	 *  -O &lt;num&gt;
	 *  The factor for outlier detection.
	 *  (default: 3)
	 * </pre>
	 * 
	 * <pre>
	 *  -E &lt;num&gt;
	 *  The factor for extreme values detection.
	 *  (default: 2*Outlier Factor)
	 * </pre>
	 * 
	 * <pre>
	 *  -E-as-O
	 *  Tags extreme values also as outliers.
	 *  (default: off)
	 * </pre>
	 * 
	 * <pre>
	 *  -P
	 *  Generates Outlier/ExtremeValue pair for each numeric attribute in
	 *  the range, not just a single indicator pair for all the attributes.
	 *  (default: off)
	 * </pre>
	 * 
	 * <pre>
	 *  -M
	 *  Generates an additional attribute 'Offset' per Outlier/ExtremeValue
	 *  pair that contains the multiplier that the value is off the median.
	 *     value = median + 'multiplier' * IQR
	 * Note: implicitely sets '-P'. (default: off)
	 * </pre>
	 * 
	 * <!-- options-end -->
	 * 
	 * @param options
	 *            the list of options as an array of strings
	 * @throws Exception
	 *             if an option is not supported
	 */
	public void setOptions(String[] options) throws Exception {
		String tmpStr;

		super.setOptions(options);

		tmpStr = Utils.getOption("R", options);
		if (tmpStr.length() != 0)
			setAttributeIndices(tmpStr);
		else
			setAttributeIndices("first-last");

		tmpStr = Utils.getOption("O", options);
		if (tmpStr.length() != 0)
			setOutlierFactor(Double.parseDouble(tmpStr));
		else
			setOutlierFactor(3);

		tmpStr = Utils.getOption("E", options);
		if (tmpStr.length() != 0)
			setExtremeValuesFactor(Double.parseDouble(tmpStr));
		else
			setExtremeValuesFactor(2 * getOutlierFactor());

		setExtremeValuesAsOutliers(Utils.getFlag("E-as-O", options));

		setDetectionPerAttribute(Utils.getFlag("P", options));

		setOutputOffsetMultiplier(Utils.getFlag("M", options));
	}

	/**
	 * Gets the current settings of the filter.
	 * 
	 * @return an array of strings suitable for passing to setOptions
	 */
	public String[] getOptions() {
		Vector result;
		String[] options;
		int i;

		result = new Vector();

		options = super.getOptions();
		for (i = 0; i < options.length; i++)
			result.add(options[i]);

		result.add("-R");
		if (!getAttributeIndices().equals(""))
			result.add(getAttributeIndices());
		else
			result.add("first-last");

		result.add("-O");
		result.add("" + getOutlierFactor());

		result.add("-E");
		result.add("" + getExtremeValuesFactor());

		if (getExtremeValuesAsOutliers())
			result.add("-E-as-O");

		if (getDetectionPerAttribute())
			result.add("-P");

		if (getOutputOffsetMultiplier())
			result.add("-M");

		return (String[]) result.toArray(new String[result.size()]);
	}

	/**
	 * Returns the tip text for this property
	 * 
	 * @return tip text for this property suitable for displaying in the
	 *         explorer/experimenter gui
	 */
	public String attributeIndicesTipText() {
		return "Specify range of attributes to act on; "
				+ " this is a comma separated list of attribute indices, with"
				+ " \"first\" and \"last\" valid values; specify an inclusive"
				+ " range with \"-\", eg: \"first-3,5,6-10,last\".";
	}

	/**
	 * Gets the current range selection
	 * 
	 * @return a string containing a comma separated list of ranges
	 */
	public String getAttributeIndices() {
		return m_Attributes.getRanges();
	}

	/**
	 * Sets which attributes are to be used for interquartile calculations and
	 * outlier/extreme value detection (only numeric attributes among the
	 * selection will be used).
	 * 
	 * @param value
	 *            a string representing the list of attributes. Since the string
	 *            will typically come from a user, attributes are indexed from
	 *            1. <br>
	 *            eg: first-3,5,6-last
	 * @throws IllegalArgumentException
	 *             if an invalid range list is supplied
	 */
	public void setAttributeIndices(String value) {
		m_Attributes.setRanges(value);
	}

	/**
	 * Sets which attributes are to be used for interquartile calculations and
	 * outlier/extreme value detection (only numeric attributes among the
	 * selection will be used).
	 * 
	 * @param value
	 *            an array containing indexes of attributes to work on. Since
	 *            the array will typically come from a program, attributes are
	 *            indexed from 0.
	 * @throws IllegalArgumentException
	 *             if an invalid set of ranges is supplied
	 */
	public void setAttributeIndicesArray(int[] value) {
		setAttributeIndices(Range.indicesToRangeList(value));
	}

	/**
	 * Returns the tip text for this property
	 * 
	 * @return tip text for this property suitable for displaying in the
	 *         explorer/experimenter gui
	 */
	public String outlierFactorTipText() {
		return "The factor for determining the thresholds for outliers.";
	}

	/**
	 * Sets the factor for determining the thresholds for outliers.
	 * 
	 * @param value
	 *            the factor.
	 */
	public void setOutlierFactor(double value) {
		if (value >= getExtremeValuesFactor())
			System.err
					.println("OutlierFactor must be smaller than ExtremeValueFactor");
		else
			m_OutlierFactor = value;
	}

	/**
	 * Gets the factor for determining the thresholds for outliers.
	 * 
	 * @return the factor.
	 */
	public double getOutlierFactor() {
		return m_OutlierFactor;
	}

	/**
	 * Returns the tip text for this property
	 * 
	 * @return tip text for this property suitable for displaying in the
	 *         explorer/experimenter gui
	 */
	public String extremeValuesFactorTipText() {
		return "The factor for determining the thresholds for extreme values.";
	}

	/**
	 * Sets the factor for determining the thresholds for extreme values.
	 * 
	 * @param value
	 *            the factor.
	 */
	public void setExtremeValuesFactor(double value) {
		if (value <= getOutlierFactor())
			System.err
					.println("ExtremeValuesFactor must be greater than OutlierFactor!");
		else
			m_ExtremeValuesFactor = value;
	}

	/**
	 * Gets the factor for determining the thresholds for extreme values.
	 * 
	 * @return the factor.
	 */
	public double getExtremeValuesFactor() {
		return m_ExtremeValuesFactor;
	}

	/**
	 * Returns the tip text for this property
	 * 
	 * @return tip text for this property suitable for displaying in the
	 *         explorer/experimenter gui
	 */
	public String extremeValuesAsOutliersTipText() {
		return "Whether to tag extreme values also as outliers.";
	}

	/**
	 * Set whether extreme values are also tagged as outliers.
	 * 
	 * @param value
	 *            whether or not to tag extreme values also as outliers.
	 */
	public void setExtremeValuesAsOutliers(boolean value) {
		m_ExtremeValuesAsOutliers = value;
	}

	/**
	 * Get whether extreme values are also tagged as outliers.
	 * 
	 * @return true if extreme values are also tagged as outliers.
	 */
	public boolean getExtremeValuesAsOutliers() {
		return m_ExtremeValuesAsOutliers;
	}

	/**
	 * Returns the tip text for this property
	 * 
	 * @return tip text for this property suitable for displaying in the
	 *         explorer/experimenter gui
	 */
	public String detectionPerAttributeTipText() {
		return "Generates Outlier/ExtremeValue attribute pair for each numeric "
				+ "attribute, not just a single pair for all numeric attributes together.";
	}

	/**
	 * Set whether an Outlier/ExtremeValue attribute pair is generated for each
	 * numeric attribute ("true") or just one pair for all numeric attributes
	 * together ("false").
	 * 
	 * @param value
	 *            whether or not to generate indicator attribute pairs for each
	 *            numeric attribute.
	 */
	public void setDetectionPerAttribute(boolean value) {
		m_DetectionPerAttribute = value;
		if (!m_DetectionPerAttribute)
			m_OutputOffsetMultiplier = false;
	}

	/**
	 * Gets whether an Outlier/ExtremeValue attribute pair is generated for each
	 * numeric attribute ("true") or just one pair for all numeric attributes
	 * together ("false").
	 * 
	 * @return true if indicator attribute pairs are generated for each numeric
	 *         attribute.
	 */
	public boolean getDetectionPerAttribute() {
		return m_DetectionPerAttribute;
	}

	/**
	 * Returns the tip text for this property
	 * 
	 * @return tip text for this property suitable for displaying in the
	 *         explorer/experimenter gui
	 */
	public String outputOffsetMultiplierTipText() {
		return "Generates an additional attribute 'Offset' that contains the "
				+ "multiplier the value is off the median: "
				+ "value = median + 'multiplier' * IQR";
	}

	/**
	 * Set whether an additional attribute "Offset" is generated per
	 * Outlier/ExtremeValue attribute pair that lists the multiplier the value
	 * is off the median: value = median + 'multiplier' * IQR.
	 * 
	 * @param value
	 *            whether or not to generate the additional attribute.
	 */
	public void setOutputOffsetMultiplier(boolean value) {
		m_OutputOffsetMultiplier = value;
		if (m_OutputOffsetMultiplier)
			m_DetectionPerAttribute = true;
	}

	/**
	 * Gets whether an additional attribute "Offset" is generated per
	 * Outlier/ExtremeValue attribute pair that lists the multiplier the value
	 * is off the median: value = median + 'multiplier' * IQR.
	 * 
	 * @return true if the additional attribute is generated.
	 */
	public boolean getOutputOffsetMultiplier() {
		return m_OutputOffsetMultiplier;
	}

	/**
	 * Returns the Capabilities of this filter.
	 * 
	 * @return the capabilities of this object
	 * @see Capabilities
	 */
	public Capabilities getCapabilities() {
		Capabilities result = super.getCapabilities();

		// attributes
		result.enableAllAttributes();
		result.enable(Capability.MISSING_VALUES);

		// class
		result.enableAllClasses();
		result.enable(Capability.MISSING_CLASS_VALUES);
		result.enable(Capability.NO_CLASS);

		return result;
	}

	/**
	 * Determines the output format based on the input format and returns this.
	 * In case the output format cannot be returned immediately, i.e.,
	 * hasImmediateOutputFormat() returns false, then this method will called
	 * from batchFinished() after the call of preprocess(Instances), in which,
	 * e.g., statistics for the actual processing step can be gathered.
	 * 
	 * @param inputFormat
	 *            the input format to base the output format on
	 * @return the output format
	 * @throws Exception
	 *             in case the determination goes wrong
	 * @see #hasImmediateOutputFormat()
	 * @see #batchFinished()
	 */
	protected Instances determineOutputFormat(Instances inputFormat)
			throws Exception {

		FastVector atts;
		FastVector values;
		Instances result;
		int i;

		// attributes must be numeric
		m_Attributes.setUpper(inputFormat.numAttributes() - 1);
		m_AttributeIndices = m_Attributes.getSelection();
		for (i = 0; i < m_AttributeIndices.length; i++) {
			// ignore class
			if (m_AttributeIndices[i] == inputFormat.classIndex()) {
				m_AttributeIndices[i] = NON_NUMERIC;
				continue;
			}
			// not numeric -> ignore it
			if (!inputFormat.attribute(m_AttributeIndices[i]).isNumeric())
				m_AttributeIndices[i] = NON_NUMERIC;
		}

		// get old attributes
		atts = new FastVector();
		for (i = 0; i < inputFormat.numAttributes(); i++)
			atts.addElement(inputFormat.attribute(i));

		if (!getDetectionPerAttribute()) {
			m_OutlierAttributePosition = new int[1];
			m_OutlierAttributePosition[0] = atts.size();

			// add 2 new attributes
			values = new FastVector();
			values.addElement("no");
			values.addElement("yes");
			atts.addElement(new Attribute("Outlier", values));

			values = new FastVector();
			values.addElement("no");
			values.addElement("yes");
			atts.addElement(new Attribute("ExtremeValue", values));
		} else {
			m_OutlierAttributePosition = new int[m_AttributeIndices.length];

			for (i = 0; i < m_AttributeIndices.length; i++) {
				if (m_AttributeIndices[i] == NON_NUMERIC)
					continue;

				m_OutlierAttributePosition[i] = atts.size();

				// add new attributes
				values = new FastVector();
				values.addElement("no");
				values.addElement("yes");
				atts.addElement(new Attribute(inputFormat.attribute(
						m_AttributeIndices[i]).name()
						+ "_Outlier", values));

				values = new FastVector();
				values.addElement("no");
				values.addElement("yes");
				atts.addElement(new Attribute(inputFormat.attribute(
						m_AttributeIndices[i]).name()
						+ "_ExtremeValue", values));

				if (getOutputOffsetMultiplier())
					atts.addElement(new Attribute(inputFormat.attribute(
							m_AttributeIndices[i]).name()
							+ "_Offset"));
			}
		}

		// generate header
		result = new Instances(inputFormat.relationName(), atts, 0);
		result.setClassIndex(inputFormat.classIndex());

		return result;
	}

	/**
	 * computes the thresholds for outliers and extreme values
	 * 
	 * @param instances
	 *            the data to work on
	 */
	protected void computeThresholds(Instances instances) {
		int i;
		double[] values;
		int[] sortedIndices;
		int half;
		int quarter;
		double q1;
		double q2;
		double q3;

		m_UpperExtremeValue = new double[m_AttributeIndices.length];
		m_UpperOutlier = new double[m_AttributeIndices.length];
		m_LowerOutlier = new double[m_AttributeIndices.length];
		m_LowerExtremeValue = new double[m_AttributeIndices.length];
		m_Median = new double[m_AttributeIndices.length];
		m_IQR = new double[m_AttributeIndices.length];

		for (i = 0; i < m_AttributeIndices.length; i++) {
			// non-numeric attribute?
			if (m_AttributeIndices[i] == NON_NUMERIC)
				continue;

			// sort attribute data
			values = instances.attributeToDoubleArray(m_AttributeIndices[i]);
			sortedIndices = Utils.sort(values);

			// determine indices
			half = sortedIndices.length / 2;
			quarter = half / 2;

			if (sortedIndices.length % 2 == 1) {
				q2 = values[sortedIndices[half]];
			} else {
				q2 = (values[sortedIndices[half]] + values[sortedIndices[half + 1]]) / 2;
			}

			if (half % 2 == 1) {
				q1 = values[sortedIndices[quarter]];
				q3 = values[sortedIndices[sortedIndices.length - quarter - 1]];
			} else {
				q1 = (values[sortedIndices[quarter]] + values[sortedIndices[quarter + 1]]) / 2;
				q3 = (values[sortedIndices[sortedIndices.length - quarter - 1]] + values[sortedIndices[sortedIndices.length
						- quarter]]) / 2;
			}

			// determine thresholds and other values
			m_Median[i] = q2;
			m_IQR[i] = q3 - q1;
			m_UpperExtremeValue[i] = q3 + getExtremeValuesFactor() * m_IQR[i];
			m_UpperOutlier[i] = q3 + getOutlierFactor() * m_IQR[i];
			m_LowerOutlier[i] = q1 - getOutlierFactor() * m_IQR[i];
			m_LowerExtremeValue[i] = q1 - getExtremeValuesFactor() * m_IQR[i];
		}
	}

	/**
	 * returns whether the instance has an outlier in the specified attribute or
	 * not
	 * 
	 * @param inst
	 *            the instance to test
	 * @param index
	 *            the attribute index
	 * @return true if the instance is an outlier
	 */
	protected boolean isOutlier(Instance inst, int index) {
		boolean result;
		double value;

		value = inst.value(m_AttributeIndices[index]);
		result = ((m_UpperOutlier[index] < value) && (value <= m_UpperExtremeValue[index]))
				|| ((m_LowerExtremeValue[index] <= value) && (value < m_LowerOutlier[index]));

		return result;
	}

	/**
	 * returns whether the instance is an outlier or not
	 * 
	 * @param inst
	 *            the instance to test
	 * @return true if the instance is an outlier
	 */
	protected boolean isOutlier(Instance inst) {
		boolean result;
		int i;

		result = false;

		for (i = 0; i < m_AttributeIndices.length; i++) {
			// non-numeric attribute?
			if (m_AttributeIndices[i] == NON_NUMERIC)
				continue;

			result = isOutlier(inst, m_AttributeIndices[i]);

			if (result)
				break;
		}

		return result;
	}

	/**
	 * returns whether the instance has an extreme value in the specified
	 * attribute or not
	 * 
	 * @param inst
	 *            the instance to test
	 * @param index
	 *            the attribute index
	 * @return true if the instance is an extreme value
	 */
	protected boolean isExtremeValue(Instance inst, int index) {
		boolean result;
		double value;

		value = inst.value(m_AttributeIndices[index]);
		result = (value > m_UpperExtremeValue[index])
				|| (value < m_LowerExtremeValue[index]);

		return result;
	}

	/**
	 * returns whether the instance is an extreme value or not
	 * 
	 * @param inst
	 *            the instance to test
	 * @return true if the instance is an extreme value
	 */
	protected boolean isExtremeValue(Instance inst) {
		boolean result;
		int i;

		result = false;

		for (i = 0; i < m_AttributeIndices.length; i++) {
			// non-numeric attribute?
			if (m_AttributeIndices[i] == NON_NUMERIC)
				continue;

			result = isExtremeValue(inst, m_AttributeIndices[i]);

			if (result)
				break;
		}

		return result;
	}

	/**
	 * returns the mulitplier of the IQR the instance is off the median for this
	 * particular attribute.
	 * 
	 * @param inst
	 *            the instance to test
	 * @param index
	 *            the attribute index
	 * @return the multiplier
	 */
	protected double calculateMultiplier(Instance inst, int index) {
		double result;
		double value;

		value = inst.value(m_AttributeIndices[index]);
		result = (value - m_Median[index]) / m_IQR[index];

		return result;
	}

	/**
	 * Processes the given data (may change the provided dataset) and returns
	 * the modified version. This method is called in batchFinished(). This
	 * implementation only calls process(Instance) for each instance in the
	 * given dataset.
	 * 
	 * @param instances
	 *            the data to process
	 * @return the modified data
	 * @throws Exception
	 *             in case the processing goes wrong
	 * @see #batchFinished()
	 */
	protected Instances process(Instances instances) throws Exception {
		Instances result;
		Instance instOld;
		Instance instNew;
		int i;
		int n;
		double[] values;
		int numAttNew;
		int numAttOld;

		if (!isFirstBatchDone())
			computeThresholds(instances);

		result = getOutputFormat();
		numAttOld = instances.numAttributes();
		numAttNew = result.numAttributes();

		for (n = 0; n < instances.numInstances(); n++) {
			instOld = instances.instance(n);
			values = new double[numAttNew];
			System.arraycopy(instOld.toDoubleArray(), 0, values, 0, numAttOld);

			// generate new instance
			instNew = new Instance(1.0, values);
			instNew.setDataset(result);

			// per attribute?
			if (!getDetectionPerAttribute()) {
				// outlier?
				if (isOutlier(instOld))
					instNew.setValue(m_OutlierAttributePosition[0], 1);
				// extreme value?
				if (isExtremeValue(instOld)) {
					instNew.setValue(m_OutlierAttributePosition[0] + 1, 1);
					// tag extreme values also as outliers?
					if (getExtremeValuesAsOutliers())
						instNew.setValue(m_OutlierAttributePosition[0], 1);
				}
			} else {
				for (i = 0; i < m_AttributeIndices.length; i++) {
					// non-numeric attribute?
					if (m_AttributeIndices[i] == NON_NUMERIC)
						continue;

					// outlier?
					if (isOutlier(instOld, m_AttributeIndices[i]))
						instNew.setValue(m_OutlierAttributePosition[i], 1);
					// extreme value?
					if (isExtremeValue(instOld, m_AttributeIndices[i])) {
						instNew.setValue(m_OutlierAttributePosition[i] + 1, 1);
						// tag extreme values also as outliers?
						if (getExtremeValuesAsOutliers())
							instNew.setValue(m_OutlierAttributePosition[i], 1);
					}
					// add multiplier?
					if (getOutputOffsetMultiplier())
						instNew.setValue(m_OutlierAttributePosition[i] + 2,
								calculateMultiplier(instOld,
										m_AttributeIndices[i]));
				}
			}

			// copy possible strings, relational values...
			copyValues(instNew, false, instOld.dataset(), getOutputFormat());

			// add to output
			result.add(instNew);
		}

		return result;
	}

	/**
	 * Returns the revision string.
	 * 
	 * @return the revision
	 */
	public String getRevision() {
		return RevisionUtils.extract("$Revision: 1.3 $");
	}

	/**
	 * Main method for testing this class.
	 * 
	 * @param args
	 *            should contain arguments to the filter: use -h for help
	 */
	public static void main(String[] args) {
		runFilter(new InterquartileRange(), args);
	}
}
